{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Trigger a run from a notebook"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[NeMo W 2024-08-29 17:14:25 nemo_logging:349] /Users/romeyn/base/code/.venv/lib/python3.10/site-packages/megatron/core/optimizer/__init__.py:18: UserWarning: Transformer Engine and Apex are not installed. Falling back to Torch optimizers.\n",
      "      warnings.warn(\n",
      "    \n",
      "[NeMo W 2024-08-29 17:14:25 nemo_logging:349] /Users/romeyn/base/code/.venv/lib/python3.10/site-packages/megatron/core/optimizer/clip_grads.py:31: UserWarning: Transformer Engine and Apex are not installed. Falling back to local implementations of multi_tensor_applier, multi_tensor_l2norm, and multi_tensor_scale\n",
      "      warnings.warn(\n",
      "    \n"
     ]
    }
   ],
   "source": [
    "# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.\n",
    "#\n",
    "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
    "# you may not use this file except in compliance with the License.\n",
    "# You may obtain a copy of the License at\n",
    "#\n",
    "#     http://www.apache.org/licenses/LICENSE-2.0\n",
    "#\n",
    "# Unless required by applicable law or agreed to in writing, software\n",
    "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
    "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
    "# See the License for the specific language governing permissions and\n",
    "# limitations under the License.\n",
    "\n",
    "import nemo_run as run\n",
    "from nemo.collections import llm\n",
    "from nemo.collections.llm.recipes import llama3_8b\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/svg+xml": [
       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
       "<!-- Generated by graphviz version 11.0.0 (20240428.1522)\n",
       " -->\n",
       "<!-- Pages: 1 -->\n",
       "<svg width=\"1905pt\" height=\"852pt\"\n",
       " viewBox=\"0.00 0.00 1904.50 851.50\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 847.5)\">\n",
       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-847.5 1900.5,-847.5 1900.5,4 -4,4\"/>\n",
       "<!-- 2 -->\n",
       "<g id=\"node1\" class=\"node\">\n",
       "<title>2</title>\n",
       "<polygon fill=\"#fff8dc\" stroke=\"none\" points=\"0,-187.5 0,-207.5 129,-207.5 129,-187.5 0,-187.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"0,-187.5 0,-207.5 129,-207.5 129,-187.5 0,-187.5\"/>\n",
       "<text text-anchor=\"start\" x=\"3.75\" y=\"-195\" font-family=\"Courier,monospace\" font-size=\"8.00\">Config:</text>\n",
       "<text text-anchor=\"start\" x=\"35.25\" y=\"-195\" font-family=\"Courier,monospace\" font-size=\"10.00\"> Llama3Config8B</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"0,-167.5 0,-187.5 129,-187.5 129,-167.5 0,-167.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"0,-167.5 0,-187.5 129,-187.5 129,-167.5 0,-167.5\"/>\n",
       "<text text-anchor=\"start\" x=\"28.5\" y=\"-175\" font-family=\"Courier,monospace\" font-style=\"italic\" font-size=\"10.00\">no arguments</text>\n",
       "</g>\n",
       "<!-- 1 -->\n",
       "<g id=\"node2\" class=\"node\">\n",
       "<title>1</title>\n",
       "<polygon fill=\"#90ee90\" stroke=\"none\" points=\"85,-500.5 85,-520.5 190,-520.5 190,-500.5 85,-500.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"85,-500.5 85,-520.5 190,-520.5 190,-500.5 85,-500.5\"/>\n",
       "<text text-anchor=\"start\" x=\"88.75\" y=\"-508\" font-family=\"Courier,monospace\" font-size=\"8.00\">Config:</text>\n",
       "<text text-anchor=\"start\" x=\"120.25\" y=\"-508\" font-family=\"Courier,monospace\" font-size=\"10.00\"> LlamaModel</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"85,-480.5 85,-500.5 151,-500.5 151,-480.5 85,-480.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"85,-480.5 85,-500.5 151,-500.5 151,-480.5 85,-480.5\"/>\n",
       "<text text-anchor=\"start\" x=\"111\" y=\"-487\" font-family=\"Courier,monospace\" font-size=\"10.00\">config</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"151,-480.5 151,-500.5 190,-500.5 190,-480.5 151,-480.5\"/>\n",
       "<polygon fill=\"#fff8dc\" stroke=\"none\" points=\"155,-484.5 155,-496.5 186,-496.5 186,-484.5 155,-484.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"155,-484.5 155,-496.5 186,-496.5 186,-484.5 155,-484.5\"/>\n",
       "</g>\n",
       "<!-- 1&#45;&#45;2 -->\n",
       "<g id=\"edge1\" class=\"edge\">\n",
       "<title>1:c&#45;&#45;2:c</title>\n",
       "<path fill=\"none\" stroke=\"#cbc6b0\" stroke-width=\"3\" stroke-opacity=\"0.501961\" d=\"M168.83,-484.77C157.56,-452.76 92.23,-267.24 71.09,-207.22\"/>\n",
       "</g>\n",
       "<!-- 0 -->\n",
       "<g id=\"node3\" class=\"node\">\n",
       "<title>0</title>\n",
       "<polygon fill=\"#ffc0cb\" stroke=\"none\" points=\"640.5,-823.5 640.5,-843.5 738.5,-843.5 738.5,-823.5 640.5,-823.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" stroke-dasharray=\"5,2\" points=\"640.5,-823.5 640.5,-843.5 738.5,-843.5 738.5,-823.5 640.5,-823.5\"/>\n",
       "<text text-anchor=\"start\" x=\"644.5\" y=\"-831\" font-family=\"Courier,monospace\" font-size=\"8.00\">Partial:</text>\n",
       "<text text-anchor=\"start\" x=\"680.5\" y=\"-831\" font-family=\"Courier,monospace\" font-size=\"10.00\"> pretrain</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" stroke-dasharray=\"5,2\" points=\"640.5,-803.5 640.5,-823.5 706.5,-823.5 706.5,-803.5 640.5,-803.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"640.5,-803.5 640.5,-823.5 706.5,-823.5 706.5,-803.5 640.5,-803.5\"/>\n",
       "<text text-anchor=\"start\" x=\"672.5\" y=\"-810\" font-family=\"Courier,monospace\" font-size=\"10.00\">model</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"706.5,-803.5 706.5,-823.5 738.5,-823.5 738.5,-803.5 706.5,-803.5\"/>\n",
       "<polygon fill=\"#90ee90\" stroke=\"none\" points=\"710.5,-807.5 710.5,-819.5 734.5,-819.5 734.5,-807.5 710.5,-807.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"710.5,-807.5 710.5,-819.5 734.5,-819.5 734.5,-807.5 710.5,-807.5\"/>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"640.5,-783.5 640.5,-803.5 706.5,-803.5 706.5,-783.5 640.5,-783.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"640.5,-783.5 640.5,-803.5 706.5,-803.5 706.5,-783.5 640.5,-783.5\"/>\n",
       "<text text-anchor=\"start\" x=\"678.5\" y=\"-790\" font-family=\"Courier,monospace\" font-size=\"10.00\">data</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"706.5,-783.5 706.5,-803.5 738.5,-803.5 738.5,-783.5 706.5,-783.5\"/>\n",
       "<polygon fill=\"#ffa07a\" stroke=\"none\" points=\"710.5,-787.5 710.5,-799.5 734.5,-799.5 734.5,-787.5 710.5,-787.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"710.5,-787.5 710.5,-799.5 734.5,-799.5 734.5,-787.5 710.5,-787.5\"/>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"640.5,-763.5 640.5,-783.5 706.5,-783.5 706.5,-763.5 640.5,-763.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"640.5,-763.5 640.5,-783.5 706.5,-783.5 706.5,-763.5 640.5,-763.5\"/>\n",
       "<text text-anchor=\"start\" x=\"660.5\" y=\"-770\" font-family=\"Courier,monospace\" font-size=\"10.00\">trainer</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"706.5,-763.5 706.5,-783.5 738.5,-783.5 738.5,-763.5 706.5,-763.5\"/>\n",
       "<polygon fill=\"#add8e6\" stroke=\"none\" points=\"710.5,-767.5 710.5,-779.5 734.5,-779.5 734.5,-767.5 710.5,-767.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"710.5,-767.5 710.5,-779.5 734.5,-779.5 734.5,-767.5 710.5,-767.5\"/>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"640.5,-743.5 640.5,-763.5 706.5,-763.5 706.5,-743.5 640.5,-743.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"640.5,-743.5 640.5,-763.5 706.5,-763.5 706.5,-743.5 640.5,-743.5\"/>\n",
       "<text text-anchor=\"start\" x=\"684.5\" y=\"-750\" font-family=\"Courier,monospace\" font-size=\"10.00\">log</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"706.5,-743.5 706.5,-763.5 738.5,-763.5 738.5,-743.5 706.5,-743.5\"/>\n",
       "<polygon fill=\"#db7093\" stroke=\"none\" points=\"710.5,-747.5 710.5,-759.5 734.5,-759.5 734.5,-747.5 710.5,-747.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"710.5,-747.5 710.5,-759.5 734.5,-759.5 734.5,-747.5 710.5,-747.5\"/>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"640.5,-723.5 640.5,-743.5 706.5,-743.5 706.5,-723.5 640.5,-723.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"640.5,-723.5 640.5,-743.5 706.5,-743.5 706.5,-723.5 640.5,-723.5\"/>\n",
       "<text text-anchor=\"start\" x=\"666.5\" y=\"-730\" font-family=\"Courier,monospace\" font-size=\"10.00\">resume</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"706.5,-723.5 706.5,-743.5 738.5,-743.5 738.5,-723.5 706.5,-723.5\"/>\n",
       "<polygon fill=\"#00bfff\" stroke=\"none\" points=\"710.5,-727.5 710.5,-739.5 734.5,-739.5 734.5,-727.5 710.5,-727.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"710.5,-727.5 710.5,-739.5 734.5,-739.5 734.5,-727.5 710.5,-727.5\"/>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"640.5,-703.5 640.5,-723.5 706.5,-723.5 706.5,-703.5 640.5,-703.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"640.5,-703.5 640.5,-723.5 706.5,-723.5 706.5,-703.5 640.5,-703.5\"/>\n",
       "<text text-anchor=\"start\" x=\"672.5\" y=\"-710\" font-family=\"Courier,monospace\" font-size=\"10.00\">optim</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"706.5,-703.5 706.5,-723.5 738.5,-723.5 738.5,-703.5 706.5,-703.5\"/>\n",
       "<polygon fill=\"#7b68ee\" stroke=\"none\" points=\"710.5,-707.5 710.5,-719.5 734.5,-719.5 734.5,-707.5 710.5,-707.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"710.5,-707.5 710.5,-719.5 734.5,-719.5 734.5,-707.5 710.5,-707.5\"/>\n",
       "</g>\n",
       "<!-- 0&#45;&#45;1 -->\n",
       "<g id=\"edge2\" class=\"edge\">\n",
       "<title>0:c&#45;&#45;1:c</title>\n",
       "<path fill=\"none\" stroke=\"#73be73\" stroke-width=\"3\" stroke-opacity=\"0.501961\" d=\"M710.73,-810.4C659.33,-800.82 450.59,-757.86 305.5,-667.5 239.2,-626.21 178.54,-554.45 151.84,-520.43\"/>\n",
       "</g>\n",
       "<!-- 3 -->\n",
       "<g id=\"node4\" class=\"node\">\n",
       "<title>3</title>\n",
       "<polygon fill=\"#ffa07a\" stroke=\"none\" points=\"314.5,-520.5 314.5,-540.5 456.5,-540.5 456.5,-520.5 314.5,-520.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"314.5,-520.5 314.5,-540.5 456.5,-540.5 456.5,-520.5 314.5,-520.5\"/>\n",
       "<text text-anchor=\"start\" x=\"324.75\" y=\"-528\" font-family=\"Courier,monospace\" font-size=\"8.00\">Config:</text>\n",
       "<text text-anchor=\"start\" x=\"356.25\" y=\"-528\" font-family=\"Courier,monospace\" font-size=\"10.00\"> MockDataModule</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"314.5,-500.5 314.5,-520.5 424.5,-520.5 424.5,-500.5 314.5,-500.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"314.5,-500.5 314.5,-520.5 424.5,-520.5 424.5,-500.5 314.5,-500.5\"/>\n",
       "<text text-anchor=\"start\" x=\"360.5\" y=\"-507\" font-family=\"Courier,monospace\" font-size=\"10.00\">seq_length</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"424.5,-500.5 424.5,-520.5 456.5,-520.5 456.5,-500.5 424.5,-500.5\"/>\n",
       "<text text-anchor=\"start\" x=\"428.5\" y=\"-507\" font-family=\"Courier,monospace\" font-size=\"10.00\">8192</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"314.5,-480.5 314.5,-500.5 424.5,-500.5 424.5,-480.5 314.5,-480.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"314.5,-480.5 314.5,-500.5 424.5,-500.5 424.5,-480.5 314.5,-480.5\"/>\n",
       "<text text-anchor=\"start\" x=\"324.5\" y=\"-487\" font-family=\"Courier,monospace\" font-size=\"10.00\">micro_batch_size</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"424.5,-480.5 424.5,-500.5 456.5,-500.5 456.5,-480.5 424.5,-480.5\"/>\n",
       "<text text-anchor=\"start\" x=\"428.5\" y=\"-487\" font-family=\"Courier,monospace\" font-size=\"10.00\">1</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"314.5,-460.5 314.5,-480.5 424.5,-480.5 424.5,-460.5 314.5,-460.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"314.5,-460.5 314.5,-480.5 424.5,-480.5 424.5,-460.5 314.5,-460.5\"/>\n",
       "<text text-anchor=\"start\" x=\"318.5\" y=\"-467\" font-family=\"Courier,monospace\" font-size=\"10.00\">global_batch_size</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"424.5,-460.5 424.5,-480.5 456.5,-480.5 456.5,-460.5 424.5,-460.5\"/>\n",
       "<text text-anchor=\"start\" x=\"428.5\" y=\"-467\" font-family=\"Courier,monospace\" font-size=\"10.00\">512</text>\n",
       "</g>\n",
       "<!-- 0&#45;&#45;3 -->\n",
       "<g id=\"edge3\" class=\"edge\">\n",
       "<title>0:c&#45;&#45;3:c</title>\n",
       "<path fill=\"none\" stroke=\"#cb8061\" stroke-width=\"3\" stroke-opacity=\"0.501961\" d=\"M710.63,-789.72C670.89,-779.87 540.79,-743.07 465.5,-667.5 429.94,-631.81 407.85,-577.2 396.12,-540.46\"/>\n",
       "</g>\n",
       "<!-- 4 -->\n",
       "<g id=\"node6\" class=\"node\">\n",
       "<title>4</title>\n",
       "<polygon fill=\"#add8e6\" stroke=\"none\" points=\"474.5,-647.5 474.5,-667.5 670.5,-667.5 670.5,-647.5 474.5,-647.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"474.5,-647.5 474.5,-667.5 670.5,-667.5 670.5,-647.5 474.5,-647.5\"/>\n",
       "<text text-anchor=\"start\" x=\"532.75\" y=\"-655\" font-family=\"Courier,monospace\" font-size=\"8.00\">Config:</text>\n",
       "<text text-anchor=\"start\" x=\"564.25\" y=\"-655\" font-family=\"Courier,monospace\" font-size=\"10.00\"> Trainer</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"474.5,-627.5 474.5,-647.5 620.5,-647.5 620.5,-627.5 474.5,-627.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"474.5,-627.5 474.5,-647.5 620.5,-647.5 620.5,-627.5 474.5,-627.5\"/>\n",
       "<text text-anchor=\"start\" x=\"550.5\" y=\"-634\" font-family=\"Courier,monospace\" font-size=\"10.00\">accelerator</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"620.5,-627.5 620.5,-647.5 670.5,-647.5 670.5,-627.5 620.5,-627.5\"/>\n",
       "<text text-anchor=\"start\" x=\"624.5\" y=\"-634\" font-family=\"Courier,monospace\" font-size=\"10.00\">&#39;gpu&#39;</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"474.5,-607.5 474.5,-627.5 620.5,-627.5 620.5,-607.5 474.5,-607.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"474.5,-607.5 474.5,-627.5 620.5,-627.5 620.5,-607.5 474.5,-607.5\"/>\n",
       "<text text-anchor=\"start\" x=\"478.5\" y=\"-614\" font-family=\"Courier,monospace\" font-size=\"10.00\">accumulate_grad_batches</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"620.5,-607.5 620.5,-627.5 670.5,-627.5 670.5,-607.5 620.5,-607.5\"/>\n",
       "<text text-anchor=\"start\" x=\"624.5\" y=\"-614\" font-family=\"Courier,monospace\" font-size=\"10.00\">1</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"474.5,-553.5 474.5,-607.5 620.5,-607.5 620.5,-553.5 474.5,-553.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"474.5,-553.5 474.5,-607.5 620.5,-607.5 620.5,-553.5 474.5,-553.5\"/>\n",
       "<text text-anchor=\"start\" x=\"562.5\" y=\"-577.38\" font-family=\"Courier,monospace\" font-size=\"10.00\">callbacks</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"620.5,-553.5 620.5,-607.5 670.5,-607.5 670.5,-553.5 620.5,-553.5\"/>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"624.5,-582.75 624.5,-603.5 666.5,-603.5 666.5,-582.75 624.5,-582.75\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"624.5,-582.75 624.5,-603.5 666.5,-603.5 666.5,-582.75 624.5,-582.75\"/>\n",
       "<text text-anchor=\"start\" x=\"633.5\" y=\"-590\" font-family=\"Courier,monospace\" font-size=\"10.00\">list</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"624.5,-566.75 624.5,-582.75 666.5,-582.75 666.5,-566.75 624.5,-566.75\"/>\n",
       "<polygon fill=\"#ff8c00\" stroke=\"none\" points=\"628.5,-570.75 628.5,-578.75 662.5,-578.75 662.5,-570.75 628.5,-570.75\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"628.5,-570.75 628.5,-578.75 662.5,-578.75 662.5,-570.75 628.5,-570.75\"/>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"624.5,-557.25 624.5,-566.75 666.5,-566.75 666.5,-557.25 624.5,-557.25\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"624.5,-557.25 624.5,-566.75 666.5,-566.75 666.5,-557.25 624.5,-557.25\"/>\n",
       "<text text-anchor=\"start\" x=\"643.62\" y=\"-560.05\" font-family=\"Courier,monospace\" font-size=\"6.00\">0</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"474.5,-533.5 474.5,-553.5 620.5,-553.5 620.5,-533.5 474.5,-533.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"474.5,-533.5 474.5,-553.5 620.5,-553.5 620.5,-533.5 474.5,-533.5\"/>\n",
       "<text text-anchor=\"start\" x=\"574.5\" y=\"-540\" font-family=\"Courier,monospace\" font-size=\"10.00\">devices</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"620.5,-533.5 620.5,-553.5 670.5,-553.5 670.5,-533.5 620.5,-533.5\"/>\n",
       "<text text-anchor=\"start\" x=\"624.5\" y=\"-540\" font-family=\"Courier,monospace\" font-size=\"10.00\">8</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"474.5,-513.5 474.5,-533.5 620.5,-533.5 620.5,-513.5 474.5,-513.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"474.5,-513.5 474.5,-533.5 620.5,-533.5 620.5,-513.5 474.5,-513.5\"/>\n",
       "<text text-anchor=\"start\" x=\"514.5\" y=\"-520\" font-family=\"Courier,monospace\" font-size=\"10.00\">gradient_clip_val</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"620.5,-513.5 620.5,-533.5 670.5,-533.5 670.5,-513.5 620.5,-513.5\"/>\n",
       "<text text-anchor=\"start\" x=\"624.5\" y=\"-520\" font-family=\"Courier,monospace\" font-size=\"10.00\">1.0</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"474.5,-493.5 474.5,-513.5 620.5,-513.5 620.5,-493.5 474.5,-493.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"474.5,-493.5 474.5,-513.5 620.5,-513.5 620.5,-493.5 474.5,-493.5\"/>\n",
       "<text text-anchor=\"start\" x=\"508.5\" y=\"-500\" font-family=\"Courier,monospace\" font-size=\"10.00\">limit_test_batches</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"620.5,-493.5 620.5,-513.5 670.5,-513.5 670.5,-493.5 620.5,-493.5\"/>\n",
       "<text text-anchor=\"start\" x=\"624.5\" y=\"-500\" font-family=\"Courier,monospace\" font-size=\"10.00\">50</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"474.5,-473.5 474.5,-493.5 620.5,-493.5 620.5,-473.5 474.5,-473.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"474.5,-473.5 474.5,-493.5 620.5,-493.5 620.5,-473.5 474.5,-473.5\"/>\n",
       "<text text-anchor=\"start\" x=\"514.5\" y=\"-480\" font-family=\"Courier,monospace\" font-size=\"10.00\">limit_val_batches</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"620.5,-473.5 620.5,-493.5 670.5,-493.5 670.5,-473.5 620.5,-473.5\"/>\n",
       "<text text-anchor=\"start\" x=\"624.5\" y=\"-480\" font-family=\"Courier,monospace\" font-size=\"10.00\">32</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"474.5,-453.5 474.5,-473.5 620.5,-473.5 620.5,-453.5 474.5,-453.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"474.5,-453.5 474.5,-473.5 620.5,-473.5 620.5,-453.5 474.5,-453.5\"/>\n",
       "<text text-anchor=\"start\" x=\"514.5\" y=\"-460\" font-family=\"Courier,monospace\" font-size=\"10.00\">log_every_n_steps</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"620.5,-453.5 620.5,-473.5 670.5,-473.5 670.5,-453.5 620.5,-453.5\"/>\n",
       "<text text-anchor=\"start\" x=\"624.5\" y=\"-460\" font-family=\"Courier,monospace\" font-size=\"10.00\">10</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"474.5,-433.5 474.5,-453.5 620.5,-453.5 620.5,-433.5 474.5,-433.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"474.5,-433.5 474.5,-453.5 620.5,-453.5 620.5,-433.5 474.5,-433.5\"/>\n",
       "<text text-anchor=\"start\" x=\"562.5\" y=\"-440\" font-family=\"Courier,monospace\" font-size=\"10.00\">max_steps</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"620.5,-433.5 620.5,-453.5 670.5,-453.5 670.5,-433.5 620.5,-433.5\"/>\n",
       "<text text-anchor=\"start\" x=\"624.5\" y=\"-440\" font-family=\"Courier,monospace\" font-size=\"10.00\">1168251</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"474.5,-413.5 474.5,-433.5 620.5,-433.5 620.5,-413.5 474.5,-413.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"474.5,-413.5 474.5,-433.5 620.5,-433.5 620.5,-413.5 474.5,-413.5\"/>\n",
       "<text text-anchor=\"start\" x=\"562.5\" y=\"-420\" font-family=\"Courier,monospace\" font-size=\"10.00\">num_nodes</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"620.5,-413.5 620.5,-433.5 670.5,-433.5 670.5,-413.5 620.5,-413.5\"/>\n",
       "<text text-anchor=\"start\" x=\"624.5\" y=\"-420\" font-family=\"Courier,monospace\" font-size=\"10.00\">1</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"474.5,-393.5 474.5,-413.5 620.5,-413.5 620.5,-393.5 474.5,-393.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"474.5,-393.5 474.5,-413.5 620.5,-413.5 620.5,-393.5 474.5,-393.5\"/>\n",
       "<text text-anchor=\"start\" x=\"574.5\" y=\"-400\" font-family=\"Courier,monospace\" font-size=\"10.00\">plugins</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"620.5,-393.5 620.5,-413.5 670.5,-413.5 670.5,-393.5 620.5,-393.5\"/>\n",
       "<polygon fill=\"#8fbc8f\" stroke=\"none\" points=\"624.5,-397.5 624.5,-409.5 666.5,-409.5 666.5,-397.5 624.5,-397.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"624.5,-397.5 624.5,-409.5 666.5,-409.5 666.5,-397.5 624.5,-397.5\"/>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"474.5,-373.5 474.5,-393.5 620.5,-393.5 620.5,-373.5 474.5,-373.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"474.5,-373.5 474.5,-393.5 620.5,-393.5 620.5,-373.5 474.5,-373.5\"/>\n",
       "<text text-anchor=\"start\" x=\"568.5\" y=\"-380\" font-family=\"Courier,monospace\" font-size=\"10.00\">strategy</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"620.5,-373.5 620.5,-393.5 670.5,-393.5 670.5,-373.5 620.5,-373.5\"/>\n",
       "<polygon fill=\"#ff6347\" stroke=\"none\" points=\"624.5,-377.5 624.5,-389.5 666.5,-389.5 666.5,-377.5 624.5,-377.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"624.5,-377.5 624.5,-389.5 666.5,-389.5 666.5,-377.5 624.5,-377.5\"/>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"474.5,-353.5 474.5,-373.5 620.5,-373.5 620.5,-353.5 474.5,-353.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"474.5,-353.5 474.5,-373.5 620.5,-373.5 620.5,-353.5 474.5,-353.5\"/>\n",
       "<text text-anchor=\"start\" x=\"478.5\" y=\"-360\" font-family=\"Courier,monospace\" font-size=\"10.00\">use_distributed_sampler</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"620.5,-353.5 620.5,-373.5 670.5,-373.5 670.5,-353.5 620.5,-353.5\"/>\n",
       "<text text-anchor=\"start\" x=\"624.5\" y=\"-360\" font-family=\"Courier,monospace\" font-size=\"10.00\">False</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"474.5,-333.5 474.5,-353.5 620.5,-353.5 620.5,-333.5 474.5,-333.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"474.5,-333.5 474.5,-353.5 620.5,-353.5 620.5,-333.5 474.5,-333.5\"/>\n",
       "<text text-anchor=\"start\" x=\"508.5\" y=\"-340\" font-family=\"Courier,monospace\" font-size=\"10.00\">val_check_interval</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"620.5,-333.5 620.5,-353.5 670.5,-353.5 670.5,-333.5 620.5,-333.5\"/>\n",
       "<text text-anchor=\"start\" x=\"624.5\" y=\"-340\" font-family=\"Courier,monospace\" font-size=\"10.00\">2000</text>\n",
       "</g>\n",
       "<!-- 0&#45;&#45;4 -->\n",
       "<g id=\"edge9\" class=\"edge\">\n",
       "<title>0:c&#45;&#45;4:c</title>\n",
       "<path fill=\"none\" stroke=\"#8aacb7\" stroke-width=\"3\" stroke-opacity=\"0.501961\" d=\"M719.9,-767.8C712.41,-754.27 689.71,-713.27 664.31,-667.37\"/>\n",
       "</g>\n",
       "<!-- 9 -->\n",
       "<g id=\"node11\" class=\"node\">\n",
       "<title>9</title>\n",
       "<polygon fill=\"#db7093\" stroke=\"none\" points=\"805.5,-540.5 805.5,-560.5 941.5,-560.5 941.5,-540.5 805.5,-540.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"805.5,-540.5 805.5,-560.5 941.5,-560.5 941.5,-540.5 805.5,-540.5\"/>\n",
       "<text text-anchor=\"start\" x=\"824.75\" y=\"-548\" font-family=\"Courier,monospace\" font-size=\"8.00\">Config:</text>\n",
       "<text text-anchor=\"start\" x=\"856.25\" y=\"-548\" font-family=\"Courier,monospace\" font-size=\"10.00\"> NeMoLogger</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"805.5,-520.5 805.5,-540.5 879.5,-540.5 879.5,-520.5 805.5,-520.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"805.5,-520.5 805.5,-540.5 879.5,-540.5 879.5,-520.5 805.5,-520.5\"/>\n",
       "<text text-anchor=\"start\" x=\"851.5\" y=\"-527\" font-family=\"Courier,monospace\" font-size=\"10.00\">name</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"879.5,-520.5 879.5,-540.5 941.5,-540.5 941.5,-520.5 879.5,-520.5\"/>\n",
       "<text text-anchor=\"start\" x=\"883.5\" y=\"-527\" font-family=\"Courier,monospace\" font-size=\"10.00\">&#39;default&#39;</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"805.5,-500.5 805.5,-520.5 879.5,-520.5 879.5,-500.5 805.5,-500.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"805.5,-500.5 805.5,-520.5 879.5,-520.5 879.5,-500.5 805.5,-500.5\"/>\n",
       "<text text-anchor=\"start\" x=\"857.5\" y=\"-507\" font-family=\"Courier,monospace\" font-size=\"10.00\">dir</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"879.5,-500.5 879.5,-520.5 941.5,-520.5 941.5,-500.5 879.5,-500.5\"/>\n",
       "<text text-anchor=\"start\" x=\"883.5\" y=\"-507\" font-family=\"Courier,monospace\" font-size=\"10.00\">None</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"805.5,-480.5 805.5,-500.5 879.5,-500.5 879.5,-480.5 805.5,-480.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"805.5,-480.5 805.5,-500.5 879.5,-500.5 879.5,-480.5 805.5,-480.5\"/>\n",
       "<text text-anchor=\"start\" x=\"851.5\" y=\"-487\" font-family=\"Courier,monospace\" font-size=\"10.00\">ckpt</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"879.5,-480.5 879.5,-500.5 941.5,-500.5 941.5,-480.5 879.5,-480.5\"/>\n",
       "<polygon fill=\"#f0e68c\" stroke=\"none\" points=\"883.5,-484.5 883.5,-496.5 937.5,-496.5 937.5,-484.5 883.5,-484.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"883.5,-484.5 883.5,-496.5 937.5,-496.5 937.5,-484.5 883.5,-484.5\"/>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"805.5,-460.5 805.5,-480.5 879.5,-480.5 879.5,-460.5 805.5,-460.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"805.5,-460.5 805.5,-480.5 879.5,-480.5 879.5,-460.5 805.5,-460.5\"/>\n",
       "<text text-anchor=\"start\" x=\"809.5\" y=\"-467\" font-family=\"Courier,monospace\" font-size=\"10.00\">tensorboard</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"879.5,-460.5 879.5,-480.5 941.5,-480.5 941.5,-460.5 879.5,-460.5\"/>\n",
       "<polygon fill=\"#32cd32\" stroke=\"none\" points=\"883.5,-464.5 883.5,-476.5 937.5,-476.5 937.5,-464.5 883.5,-464.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"883.5,-464.5 883.5,-476.5 937.5,-476.5 937.5,-464.5 883.5,-464.5\"/>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"805.5,-440.5 805.5,-460.5 879.5,-460.5 879.5,-440.5 805.5,-440.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"805.5,-440.5 805.5,-460.5 879.5,-460.5 879.5,-440.5 805.5,-440.5\"/>\n",
       "<text text-anchor=\"start\" x=\"845.5\" y=\"-447\" font-family=\"Courier,monospace\" font-size=\"10.00\">wandb</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"879.5,-440.5 879.5,-460.5 941.5,-460.5 941.5,-440.5 879.5,-440.5\"/>\n",
       "<text text-anchor=\"start\" x=\"883.5\" y=\"-447\" font-family=\"Courier,monospace\" font-size=\"10.00\">None</text>\n",
       "</g>\n",
       "<!-- 0&#45;&#45;9 -->\n",
       "<g id=\"edge12\" class=\"edge\">\n",
       "<title>0:c&#45;&#45;9:c</title>\n",
       "<path fill=\"none\" stroke=\"#af5975\" stroke-width=\"3\" stroke-opacity=\"0.501961\" d=\"M725.38,-747.71C739.1,-724.9 798.77,-625.73 838.06,-560.41\"/>\n",
       "</g>\n",
       "<!-- 12 -->\n",
       "<g id=\"node13\" class=\"node\">\n",
       "<title>12</title>\n",
       "<polygon fill=\"#00bfff\" stroke=\"none\" points=\"959.5,-510.5 959.5,-530.5 1161.5,-530.5 1161.5,-510.5 959.5,-510.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"959.5,-510.5 959.5,-530.5 1161.5,-530.5 1161.5,-510.5 959.5,-510.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1011.75\" y=\"-518\" font-family=\"Courier,monospace\" font-size=\"8.00\">Config:</text>\n",
       "<text text-anchor=\"start\" x=\"1043.25\" y=\"-518\" font-family=\"Courier,monospace\" font-size=\"10.00\"> AutoResume</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"959.5,-490.5 959.5,-510.5 1129.5,-510.5 1129.5,-490.5 959.5,-490.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"959.5,-490.5 959.5,-510.5 1129.5,-510.5 1129.5,-490.5 959.5,-490.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1029.5\" y=\"-497\" font-family=\"Courier,monospace\" font-size=\"10.00\">resume_if_exists</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1129.5,-490.5 1129.5,-510.5 1161.5,-510.5 1161.5,-490.5 1129.5,-490.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1133.5\" y=\"-497\" font-family=\"Courier,monospace\" font-size=\"10.00\">True</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"959.5,-470.5 959.5,-490.5 1129.5,-490.5 1129.5,-470.5 959.5,-470.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"959.5,-470.5 959.5,-490.5 1129.5,-490.5 1129.5,-470.5 959.5,-470.5\"/>\n",
       "<text text-anchor=\"start\" x=\"963.5\" y=\"-477\" font-family=\"Courier,monospace\" font-size=\"10.00\">resume_ignore_no_checkpoint</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1129.5,-470.5 1129.5,-490.5 1161.5,-490.5 1161.5,-470.5 1129.5,-470.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1133.5\" y=\"-477\" font-family=\"Courier,monospace\" font-size=\"10.00\">True</text>\n",
       "</g>\n",
       "<!-- 0&#45;&#45;12 -->\n",
       "<g id=\"edge13\" class=\"edge\">\n",
       "<title>0:c&#45;&#45;12:c</title>\n",
       "<path fill=\"none\" stroke=\"#0098cb\" stroke-width=\"3\" stroke-opacity=\"0.501961\" d=\"M734.4,-731.95C770.45,-729.74 880.24,-718.9 950.5,-667.5 999.19,-631.88 1032.61,-567.69 1048.96,-530.43\"/>\n",
       "</g>\n",
       "<!-- 13 -->\n",
       "<g id=\"node15\" class=\"node\">\n",
       "<title>13</title>\n",
       "<polygon fill=\"#7b68ee\" stroke=\"none\" points=\"1393,-510.5 1393,-530.5 1576,-530.5 1576,-510.5 1393,-510.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1393,-510.5 1393,-530.5 1576,-530.5 1576,-510.5 1393,-510.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1396.75\" y=\"-518\" font-family=\"Courier,monospace\" font-size=\"8.00\">Config:</text>\n",
       "<text text-anchor=\"start\" x=\"1428.25\" y=\"-518\" font-family=\"Courier,monospace\" font-size=\"10.00\"> MegatronOptimizerModule</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"1393,-490.5 1393,-510.5 1516,-510.5 1516,-490.5 1393,-490.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1393,-490.5 1393,-510.5 1516,-510.5 1516,-490.5 1393,-490.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1476\" y=\"-497\" font-family=\"Courier,monospace\" font-size=\"10.00\">config</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1516,-490.5 1516,-510.5 1576,-510.5 1576,-490.5 1516,-490.5\"/>\n",
       "<polygon fill=\"#ffc0cb\" stroke=\"none\" points=\"1520,-494.5 1520,-506.5 1572,-506.5 1572,-494.5 1520,-494.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1520,-494.5 1520,-506.5 1572,-506.5 1572,-494.5 1520,-494.5\"/>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"1393,-470.5 1393,-490.5 1516,-490.5 1516,-470.5 1393,-470.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1393,-470.5 1393,-490.5 1516,-490.5 1516,-470.5 1393,-470.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1440\" y=\"-477\" font-family=\"Courier,monospace\" font-size=\"10.00\">lr_scheduler</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1516,-470.5 1516,-490.5 1576,-490.5 1576,-470.5 1516,-470.5\"/>\n",
       "<polygon fill=\"#90ee90\" stroke=\"none\" points=\"1520,-474.5 1520,-486.5 1572,-486.5 1572,-474.5 1520,-474.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1520,-474.5 1520,-486.5 1572,-486.5 1572,-474.5 1520,-474.5\"/>\n",
       "</g>\n",
       "<!-- 0&#45;&#45;13 -->\n",
       "<g id=\"edge16\" class=\"edge\">\n",
       "<title>0:c&#45;&#45;13:c</title>\n",
       "<path fill=\"none\" stroke=\"#6253be\" stroke-width=\"3\" stroke-opacity=\"0.501961\" d=\"M734.34,-711.93C796.48,-708.83 1085.19,-693.26 1170.5,-667.5 1274.69,-636.04 1384.47,-569.06 1442.61,-530.48\"/>\n",
       "</g>\n",
       "<!-- 5 -->\n",
       "<g id=\"node5\" class=\"node\">\n",
       "<title>5</title>\n",
       "<polygon fill=\"#ff8c00\" stroke=\"none\" points=\"147,-187.5 147,-207.5 276,-207.5 276,-187.5 147,-187.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"147,-187.5 147,-207.5 276,-207.5 276,-187.5 147,-187.5\"/>\n",
       "<text text-anchor=\"start\" x=\"150.75\" y=\"-195\" font-family=\"Courier,monospace\" font-size=\"8.00\">Config:</text>\n",
       "<text text-anchor=\"start\" x=\"182.25\" y=\"-195\" font-family=\"Courier,monospace\" font-size=\"10.00\"> TimingCallback</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"147,-167.5 147,-187.5 276,-187.5 276,-167.5 147,-167.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"147,-167.5 147,-187.5 276,-187.5 276,-167.5 147,-167.5\"/>\n",
       "<text text-anchor=\"start\" x=\"175.5\" y=\"-175\" font-family=\"Courier,monospace\" font-style=\"italic\" font-size=\"10.00\">no arguments</text>\n",
       "</g>\n",
       "<!-- 4&#45;&#45;5 -->\n",
       "<g id=\"edge4\" class=\"edge\">\n",
       "<title>4:c&#45;&#45;5:c</title>\n",
       "<path fill=\"none\" stroke=\"#cb6f00\" stroke-width=\"3\" stroke-opacity=\"0.501961\" d=\"M644.28,-570.78C634.73,-547.93 569.54,-399.11 465.5,-333.5 396.49,-289.98 353.48,-342.61 285.5,-297.5 251.96,-275.24 229.56,-232.07 218.84,-207.26\"/>\n",
       "</g>\n",
       "<!-- 6 -->\n",
       "<g id=\"node8\" class=\"node\">\n",
       "<title>6</title>\n",
       "<polygon fill=\"#8fbc8f\" stroke=\"none\" points=\"294.5,-227.5 294.5,-247.5 496.5,-247.5 496.5,-227.5 294.5,-227.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"294.5,-227.5 294.5,-247.5 496.5,-247.5 496.5,-227.5 294.5,-227.5\"/>\n",
       "<text text-anchor=\"start\" x=\"310.75\" y=\"-235\" font-family=\"Courier,monospace\" font-size=\"8.00\">Config:</text>\n",
       "<text text-anchor=\"start\" x=\"342.25\" y=\"-235\" font-family=\"Courier,monospace\" font-size=\"10.00\"> MegatronMixedPrecision</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"294.5,-207.5 294.5,-227.5 416.5,-227.5 416.5,-207.5 294.5,-207.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"294.5,-207.5 294.5,-227.5 416.5,-227.5 416.5,-207.5 294.5,-207.5\"/>\n",
       "<text text-anchor=\"start\" x=\"358.5\" y=\"-214\" font-family=\"Courier,monospace\" font-size=\"10.00\">precision</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"416.5,-207.5 416.5,-227.5 496.5,-227.5 496.5,-207.5 416.5,-207.5\"/>\n",
       "<text text-anchor=\"start\" x=\"420.5\" y=\"-214\" font-family=\"Courier,monospace\" font-size=\"10.00\">&#39;bf16&#45;mixed&#39;</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"294.5,-187.5 294.5,-207.5 416.5,-207.5 416.5,-187.5 294.5,-187.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"294.5,-187.5 294.5,-207.5 416.5,-207.5 416.5,-187.5 294.5,-187.5\"/>\n",
       "<text text-anchor=\"start\" x=\"340.5\" y=\"-194\" font-family=\"Courier,monospace\" font-size=\"10.00\">params_dtype</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"416.5,-187.5 416.5,-207.5 496.5,-207.5 496.5,-187.5 416.5,-187.5\"/>\n",
       "<polygon fill=\"#adff2f\" stroke=\"none\" points=\"420.5,-191.5 420.5,-203.5 492.5,-203.5 492.5,-191.5 420.5,-191.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"420.5,-191.5 420.5,-203.5 492.5,-203.5 492.5,-191.5 420.5,-191.5\"/>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"294.5,-167.5 294.5,-187.5 416.5,-187.5 416.5,-167.5 294.5,-167.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"294.5,-167.5 294.5,-187.5 416.5,-187.5 416.5,-167.5 294.5,-167.5\"/>\n",
       "<text text-anchor=\"start\" x=\"328.5\" y=\"-174\" font-family=\"Courier,monospace\" font-size=\"10.00\">pipeline_dtype</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"416.5,-167.5 416.5,-187.5 496.5,-187.5 496.5,-167.5 416.5,-167.5\"/>\n",
       "<polygon fill=\"#adff2f\" stroke=\"none\" points=\"420.5,-171.5 420.5,-183.5 492.5,-183.5 492.5,-171.5 420.5,-171.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"420.5,-171.5 420.5,-183.5 492.5,-183.5 492.5,-171.5 420.5,-171.5\"/>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"294.5,-147.5 294.5,-167.5 416.5,-167.5 416.5,-147.5 294.5,-147.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"294.5,-147.5 294.5,-167.5 416.5,-167.5 416.5,-147.5 294.5,-147.5\"/>\n",
       "<text text-anchor=\"start\" x=\"316.5\" y=\"-154\" font-family=\"Courier,monospace\" font-size=\"10.00\">autocast_enabled</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"416.5,-147.5 416.5,-167.5 496.5,-167.5 496.5,-147.5 416.5,-147.5\"/>\n",
       "<text text-anchor=\"start\" x=\"420.5\" y=\"-154\" font-family=\"Courier,monospace\" font-size=\"10.00\">False</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"294.5,-127.5 294.5,-147.5 416.5,-147.5 416.5,-127.5 294.5,-127.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"294.5,-127.5 294.5,-147.5 416.5,-147.5 416.5,-127.5 294.5,-127.5\"/>\n",
       "<text text-anchor=\"start\" x=\"298.5\" y=\"-134\" font-family=\"Courier,monospace\" font-size=\"10.00\">grad_reduce_in_fp32</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"416.5,-127.5 416.5,-147.5 496.5,-147.5 496.5,-127.5 416.5,-127.5\"/>\n",
       "<text text-anchor=\"start\" x=\"420.5\" y=\"-134\" font-family=\"Courier,monospace\" font-size=\"10.00\">True</text>\n",
       "</g>\n",
       "<!-- 4&#45;&#45;6 -->\n",
       "<g id=\"edge7\" class=\"edge\">\n",
       "<title>4:c&#45;&#45;6:c</title>\n",
       "<path fill=\"none\" stroke=\"#729672\" stroke-width=\"3\" stroke-opacity=\"0.501961\" d=\"M638.24,-397.59C617.06,-383.14 553.79,-339.19 505.5,-297.5 487.49,-281.95 468.79,-264.13 452.02,-247.46\"/>\n",
       "</g>\n",
       "<!-- 8 -->\n",
       "<g id=\"node9\" class=\"node\">\n",
       "<title>8</title>\n",
       "<polygon fill=\"#ff6347\" stroke=\"none\" points=\"514.5,-277.5 514.5,-297.5 776.5,-297.5 776.5,-277.5 514.5,-277.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"514.5,-277.5 514.5,-297.5 776.5,-297.5 776.5,-277.5 514.5,-277.5\"/>\n",
       "<text text-anchor=\"start\" x=\"578.75\" y=\"-285\" font-family=\"Courier,monospace\" font-size=\"8.00\">Config:</text>\n",
       "<text text-anchor=\"start\" x=\"610.25\" y=\"-285\" font-family=\"Courier,monospace\" font-size=\"10.00\"> MegatronStrategy</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"514.5,-257.5 514.5,-277.5 738.5,-277.5 738.5,-257.5 514.5,-257.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"514.5,-257.5 514.5,-277.5 738.5,-277.5 738.5,-257.5 514.5,-257.5\"/>\n",
       "<text text-anchor=\"start\" x=\"578.5\" y=\"-264\" font-family=\"Courier,monospace\" font-size=\"10.00\">tensor_model_parallel_size</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"738.5,-257.5 738.5,-277.5 776.5,-277.5 776.5,-257.5 738.5,-257.5\"/>\n",
       "<text text-anchor=\"start\" x=\"742.5\" y=\"-264\" font-family=\"Courier,monospace\" font-size=\"10.00\">1</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"514.5,-237.5 514.5,-257.5 738.5,-257.5 738.5,-237.5 514.5,-237.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"514.5,-237.5 514.5,-257.5 738.5,-257.5 738.5,-237.5 514.5,-237.5\"/>\n",
       "<text text-anchor=\"start\" x=\"566.5\" y=\"-244\" font-family=\"Courier,monospace\" font-size=\"10.00\">pipeline_model_parallel_size</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"738.5,-237.5 738.5,-257.5 776.5,-257.5 776.5,-237.5 738.5,-237.5\"/>\n",
       "<text text-anchor=\"start\" x=\"742.5\" y=\"-244\" font-family=\"Courier,monospace\" font-size=\"10.00\">1</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"514.5,-217.5 514.5,-237.5 738.5,-237.5 738.5,-217.5 514.5,-217.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"514.5,-217.5 514.5,-237.5 738.5,-237.5 738.5,-217.5 514.5,-217.5\"/>\n",
       "<text text-anchor=\"start\" x=\"518.5\" y=\"-224\" font-family=\"Courier,monospace\" font-size=\"10.00\">virtual_pipeline_model_parallel_size</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"738.5,-217.5 738.5,-237.5 776.5,-237.5 776.5,-217.5 738.5,-217.5\"/>\n",
       "<text text-anchor=\"start\" x=\"742.5\" y=\"-224\" font-family=\"Courier,monospace\" font-size=\"10.00\">None</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"514.5,-197.5 514.5,-217.5 738.5,-217.5 738.5,-197.5 514.5,-197.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"514.5,-197.5 514.5,-217.5 738.5,-217.5 738.5,-197.5 514.5,-197.5\"/>\n",
       "<text text-anchor=\"start\" x=\"608.5\" y=\"-204\" font-family=\"Courier,monospace\" font-size=\"10.00\">context_parallel_size</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"738.5,-197.5 738.5,-217.5 776.5,-217.5 776.5,-197.5 738.5,-197.5\"/>\n",
       "<text text-anchor=\"start\" x=\"742.5\" y=\"-204\" font-family=\"Courier,monospace\" font-size=\"10.00\">2</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"514.5,-177.5 514.5,-197.5 738.5,-197.5 738.5,-177.5 514.5,-177.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"514.5,-177.5 514.5,-197.5 738.5,-197.5 738.5,-177.5 514.5,-177.5\"/>\n",
       "<text text-anchor=\"start\" x=\"632.5\" y=\"-184\" font-family=\"Courier,monospace\" font-size=\"10.00\">sequence_parallel</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"738.5,-177.5 738.5,-197.5 776.5,-197.5 776.5,-177.5 738.5,-177.5\"/>\n",
       "<text text-anchor=\"start\" x=\"742.5\" y=\"-184\" font-family=\"Courier,monospace\" font-size=\"10.00\">False</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"514.5,-157.5 514.5,-177.5 738.5,-177.5 738.5,-157.5 514.5,-157.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"514.5,-157.5 514.5,-177.5 738.5,-177.5 738.5,-157.5 514.5,-157.5\"/>\n",
       "<text text-anchor=\"start\" x=\"602.5\" y=\"-164\" font-family=\"Courier,monospace\" font-size=\"10.00\">ckpt_include_optimizer</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"738.5,-157.5 738.5,-177.5 776.5,-177.5 776.5,-157.5 738.5,-157.5\"/>\n",
       "<text text-anchor=\"start\" x=\"742.5\" y=\"-164\" font-family=\"Courier,monospace\" font-size=\"10.00\">True</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"514.5,-137.5 514.5,-157.5 738.5,-157.5 738.5,-137.5 514.5,-137.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"514.5,-137.5 514.5,-157.5 738.5,-157.5 738.5,-137.5 514.5,-137.5\"/>\n",
       "<text text-anchor=\"start\" x=\"650.5\" y=\"-144\" font-family=\"Courier,monospace\" font-size=\"10.00\">pipeline_dtype</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"738.5,-137.5 738.5,-157.5 776.5,-157.5 776.5,-137.5 738.5,-137.5\"/>\n",
       "<text text-anchor=\"start\" x=\"742.5\" y=\"-144\" font-family=\"Courier,monospace\" font-size=\"10.00\">None</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"514.5,-117.5 514.5,-137.5 738.5,-137.5 738.5,-117.5 514.5,-117.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"514.5,-117.5 514.5,-137.5 738.5,-137.5 738.5,-117.5 514.5,-117.5\"/>\n",
       "<text text-anchor=\"start\" x=\"644.5\" y=\"-124\" font-family=\"Courier,monospace\" font-size=\"10.00\">ckpt_async_save</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"738.5,-117.5 738.5,-137.5 776.5,-137.5 776.5,-117.5 738.5,-117.5\"/>\n",
       "<text text-anchor=\"start\" x=\"742.5\" y=\"-124\" font-family=\"Courier,monospace\" font-size=\"10.00\">True</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"514.5,-97.5 514.5,-117.5 738.5,-117.5 738.5,-97.5 514.5,-97.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"514.5,-97.5 514.5,-117.5 738.5,-117.5 738.5,-97.5 514.5,-97.5\"/>\n",
       "<text text-anchor=\"start\" x=\"626.5\" y=\"-104\" font-family=\"Courier,monospace\" font-size=\"10.00\">ckpt_parallel_load</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"738.5,-97.5 738.5,-117.5 776.5,-117.5 776.5,-97.5 738.5,-97.5\"/>\n",
       "<text text-anchor=\"start\" x=\"742.5\" y=\"-104\" font-family=\"Courier,monospace\" font-size=\"10.00\">True</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"514.5,-77.5 514.5,-97.5 738.5,-97.5 738.5,-77.5 514.5,-77.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"514.5,-77.5 514.5,-97.5 738.5,-97.5 738.5,-77.5 514.5,-77.5\"/>\n",
       "<text text-anchor=\"start\" x=\"596.5\" y=\"-84\" font-family=\"Courier,monospace\" font-size=\"10.00\">gradient_as_bucket_view</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"738.5,-77.5 738.5,-97.5 776.5,-97.5 776.5,-77.5 738.5,-77.5\"/>\n",
       "<text text-anchor=\"start\" x=\"742.5\" y=\"-84\" font-family=\"Courier,monospace\" font-size=\"10.00\">True</text>\n",
       "</g>\n",
       "<!-- 4&#45;&#45;8 -->\n",
       "<g id=\"edge8\" class=\"edge\">\n",
       "<title>4:c&#45;&#45;8:c</title>\n",
       "<path fill=\"none\" stroke=\"#cb4f38\" stroke-width=\"3\" stroke-opacity=\"0.501961\" d=\"M645.5,-377.7C645.5,-365.66 645.5,-332.55 645.5,-297.33\"/>\n",
       "</g>\n",
       "<!-- 7 -->\n",
       "<g id=\"node7\" class=\"node\">\n",
       "<title>7</title>\n",
       "<polygon fill=\"#adff2f\" stroke=\"none\" points=\"410.5,-20.75 410.5,-41.5 502.5,-41.5 502.5,-20.75 410.5,-20.75\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"410.5,-20.75 410.5,-41.5 502.5,-41.5 502.5,-20.75 410.5,-20.75\"/>\n",
       "<text text-anchor=\"start\" x=\"441.5\" y=\"-28\" font-family=\"Courier,monospace\" font-size=\"10.00\">dtype</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"410.5,0 410.5,-20.75 502.5,-20.75 502.5,0 410.5,0\"/>\n",
       "<text text-anchor=\"start\" x=\"414.5\" y=\"-7.25\" font-family=\"Courier,monospace\" font-size=\"10.00\">torch.bfloat16</text>\n",
       "</g>\n",
       "<!-- 6&#45;&#45;7 -->\n",
       "<g id=\"edge5\" class=\"edge\">\n",
       "<title>6:c&#45;&#45;7:c</title>\n",
       "<path fill=\"none\" stroke=\"#8acb25\" stroke-width=\"3\" stroke-opacity=\"0.501961\" d=\"M456.5,-191.83C456.5,-170.44 456.5,-81.25 456.5,-41.28\"/>\n",
       "</g>\n",
       "<!-- 6&#45;&#45;7 -->\n",
       "<g id=\"edge6\" class=\"edge\">\n",
       "<title>6:c&#45;&#45;7:c</title>\n",
       "<path fill=\"none\" stroke=\"#8acb25\" stroke-width=\"3\" stroke-opacity=\"0.501961\" d=\"M456.5,-171.7C456.5,-152.06 456.5,-77.17 456.5,-41.23\"/>\n",
       "</g>\n",
       "<!-- 10 -->\n",
       "<g id=\"node10\" class=\"node\">\n",
       "<title>10</title>\n",
       "<polygon fill=\"#f0e68c\" stroke=\"none\" points=\"794.5,-227.5 794.5,-247.5 1260.5,-247.5 1260.5,-227.5 794.5,-227.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"794.5,-227.5 794.5,-247.5 1260.5,-247.5 1260.5,-227.5 794.5,-227.5\"/>\n",
       "<text text-anchor=\"start\" x=\"963.75\" y=\"-235\" font-family=\"Courier,monospace\" font-size=\"8.00\">Config:</text>\n",
       "<text text-anchor=\"start\" x=\"995.25\" y=\"-235\" font-family=\"Courier,monospace\" font-size=\"10.00\"> ModelCheckpoint</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"794.5,-207.5 794.5,-227.5 916.5,-227.5 916.5,-207.5 794.5,-207.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"794.5,-207.5 794.5,-227.5 916.5,-227.5 916.5,-207.5 794.5,-207.5\"/>\n",
       "<text text-anchor=\"start\" x=\"858.5\" y=\"-214\" font-family=\"Courier,monospace\" font-size=\"10.00\">save_last</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"916.5,-207.5 916.5,-227.5 1260.5,-227.5 1260.5,-207.5 916.5,-207.5\"/>\n",
       "<text text-anchor=\"start\" x=\"920.5\" y=\"-214\" font-family=\"Courier,monospace\" font-size=\"10.00\">True</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"794.5,-187.5 794.5,-207.5 916.5,-207.5 916.5,-187.5 794.5,-187.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"794.5,-187.5 794.5,-207.5 916.5,-207.5 916.5,-187.5 794.5,-187.5\"/>\n",
       "<text text-anchor=\"start\" x=\"852.5\" y=\"-194\" font-family=\"Courier,monospace\" font-size=\"10.00\">save_top_k</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"916.5,-187.5 916.5,-207.5 1260.5,-207.5 1260.5,-187.5 916.5,-187.5\"/>\n",
       "<text text-anchor=\"start\" x=\"920.5\" y=\"-194\" font-family=\"Courier,monospace\" font-size=\"10.00\">10</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"794.5,-167.5 794.5,-187.5 916.5,-187.5 916.5,-167.5 794.5,-167.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"794.5,-167.5 794.5,-187.5 916.5,-187.5 916.5,-167.5 794.5,-167.5\"/>\n",
       "<text text-anchor=\"start\" x=\"798.5\" y=\"-174\" font-family=\"Courier,monospace\" font-size=\"10.00\">every_n_train_steps</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"916.5,-167.5 916.5,-187.5 1260.5,-187.5 1260.5,-167.5 916.5,-167.5\"/>\n",
       "<text text-anchor=\"start\" x=\"920.5\" y=\"-174\" font-family=\"Courier,monospace\" font-size=\"10.00\">200</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"794.5,-147.5 794.5,-167.5 916.5,-167.5 916.5,-147.5 794.5,-147.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"794.5,-147.5 794.5,-167.5 916.5,-167.5 916.5,-147.5 794.5,-147.5\"/>\n",
       "<text text-anchor=\"start\" x=\"822.5\" y=\"-154\" font-family=\"Courier,monospace\" font-size=\"10.00\">save_best_model</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"916.5,-147.5 916.5,-167.5 1260.5,-167.5 1260.5,-147.5 916.5,-147.5\"/>\n",
       "<text text-anchor=\"start\" x=\"920.5\" y=\"-154\" font-family=\"Courier,monospace\" font-size=\"10.00\">False</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"794.5,-127.5 794.5,-147.5 916.5,-147.5 916.5,-127.5 794.5,-127.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"794.5,-127.5 794.5,-147.5 916.5,-147.5 916.5,-127.5 794.5,-127.5\"/>\n",
       "<text text-anchor=\"start\" x=\"864.5\" y=\"-134\" font-family=\"Courier,monospace\" font-size=\"10.00\">filename</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"916.5,-127.5 916.5,-147.5 1260.5,-147.5 1260.5,-127.5 916.5,-127.5\"/>\n",
       "<text text-anchor=\"start\" x=\"920.5\" y=\"-134\" font-family=\"Courier,monospace\" font-size=\"10.00\">&#39;{model_name}&#45;&#45;{val_loss:.2f}&#45;{step}&#45;{consumed_samples}&#39;</text>\n",
       "</g>\n",
       "<!-- 9&#45;&#45;10 -->\n",
       "<g id=\"edge10\" class=\"edge\">\n",
       "<title>9:c&#45;&#45;10:c</title>\n",
       "<path fill=\"none\" stroke=\"#bfb770\" stroke-width=\"3\" stroke-opacity=\"0.501961\" d=\"M911.33,-484.52C914.71,-464.84 928.28,-390.86 950.5,-333.5 961.88,-304.13 977.78,-273.1 992.18,-247.31\"/>\n",
       "</g>\n",
       "<!-- 11 -->\n",
       "<g id=\"node12\" class=\"node\">\n",
       "<title>11</title>\n",
       "<polygon fill=\"#32cd32\" stroke=\"none\" points=\"1279,-197.5 1279,-217.5 1426,-217.5 1426,-197.5 1279,-197.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1279,-197.5 1279,-217.5 1426,-217.5 1426,-197.5 1279,-197.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1282.75\" y=\"-205\" font-family=\"Courier,monospace\" font-size=\"8.00\">Config:</text>\n",
       "<text text-anchor=\"start\" x=\"1314.25\" y=\"-205\" font-family=\"Courier,monospace\" font-size=\"10.00\"> TensorBoardLogger</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"1279,-177.5 1279,-197.5 1349,-197.5 1349,-177.5 1279,-177.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1279,-177.5 1279,-197.5 1349,-197.5 1349,-177.5 1279,-177.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1297\" y=\"-184\" font-family=\"Courier,monospace\" font-size=\"10.00\">save_dir</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1349,-177.5 1349,-197.5 1426,-197.5 1426,-177.5 1349,-177.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1353\" y=\"-184\" font-family=\"Courier,monospace\" font-size=\"10.00\">&#39;tb_logs&#39;</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"1279,-157.5 1279,-177.5 1349,-177.5 1349,-157.5 1279,-157.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1279,-157.5 1279,-177.5 1349,-177.5 1349,-157.5 1279,-157.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1321\" y=\"-164\" font-family=\"Courier,monospace\" font-size=\"10.00\">name</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1349,-157.5 1349,-177.5 1426,-177.5 1426,-157.5 1349,-157.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1353\" y=\"-164\" font-family=\"Courier,monospace\" font-size=\"10.00\">&#39;default&#39;</text>\n",
       "</g>\n",
       "<!-- 9&#45;&#45;11 -->\n",
       "<g id=\"edge11\" class=\"edge\">\n",
       "<title>9:c&#45;&#45;11:c</title>\n",
       "<path fill=\"none\" stroke=\"#28a328\" stroke-width=\"3\" stroke-opacity=\"0.501961\" d=\"M910.2,-464.53C909.3,-444.42 908.85,-368.48 950.5,-333.5 1059.76,-241.74 1145.41,-367.92 1269.5,-297.5 1301.77,-279.19 1325.53,-242.72 1339.17,-217.02\"/>\n",
       "</g>\n",
       "<!-- 14 -->\n",
       "<g id=\"node14\" class=\"node\">\n",
       "<title>14</title>\n",
       "<polygon fill=\"#ffc0cb\" stroke=\"none\" points=\"1444.5,-277.5 1444.5,-297.5 1646.5,-297.5 1646.5,-277.5 1444.5,-277.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1444.5,-277.5 1444.5,-297.5 1646.5,-297.5 1646.5,-277.5 1444.5,-277.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1481.75\" y=\"-285\" font-family=\"Courier,monospace\" font-size=\"8.00\">Config:</text>\n",
       "<text text-anchor=\"start\" x=\"1513.25\" y=\"-285\" font-family=\"Courier,monospace\" font-size=\"10.00\"> OptimizerConfig</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"1444.5,-257.5 1444.5,-277.5 1602.5,-277.5 1602.5,-257.5 1444.5,-257.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1444.5,-257.5 1444.5,-277.5 1602.5,-277.5 1602.5,-257.5 1444.5,-257.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1544.5\" y=\"-264\" font-family=\"Courier,monospace\" font-size=\"10.00\">optimizer</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1602.5,-257.5 1602.5,-277.5 1646.5,-277.5 1646.5,-257.5 1602.5,-257.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1606.5\" y=\"-264\" font-family=\"Courier,monospace\" font-size=\"10.00\">&#39;adam&#39;</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"1444.5,-237.5 1444.5,-257.5 1602.5,-257.5 1602.5,-237.5 1444.5,-237.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1444.5,-237.5 1444.5,-257.5 1602.5,-257.5 1602.5,-237.5 1444.5,-237.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1586.5\" y=\"-244\" font-family=\"Courier,monospace\" font-size=\"10.00\">lr</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1602.5,-237.5 1602.5,-257.5 1646.5,-257.5 1646.5,-237.5 1602.5,-237.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1606.5\" y=\"-244\" font-family=\"Courier,monospace\" font-size=\"10.00\">0.0003</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"1444.5,-217.5 1444.5,-237.5 1602.5,-237.5 1602.5,-217.5 1444.5,-217.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1444.5,-217.5 1444.5,-237.5 1602.5,-237.5 1602.5,-217.5 1444.5,-217.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1526.5\" y=\"-224\" font-family=\"Courier,monospace\" font-size=\"10.00\">weight_decay</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1602.5,-217.5 1602.5,-237.5 1646.5,-237.5 1646.5,-217.5 1602.5,-217.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1606.5\" y=\"-224\" font-family=\"Courier,monospace\" font-size=\"10.00\">0.1</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"1444.5,-197.5 1444.5,-217.5 1602.5,-217.5 1602.5,-197.5 1444.5,-197.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1444.5,-197.5 1444.5,-217.5 1602.5,-217.5 1602.5,-197.5 1444.5,-197.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1574.5\" y=\"-204\" font-family=\"Courier,monospace\" font-size=\"10.00\">bf16</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1602.5,-197.5 1602.5,-217.5 1646.5,-217.5 1646.5,-197.5 1602.5,-197.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1606.5\" y=\"-204\" font-family=\"Courier,monospace\" font-size=\"10.00\">True</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"1444.5,-177.5 1444.5,-197.5 1602.5,-197.5 1602.5,-177.5 1444.5,-177.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1444.5,-177.5 1444.5,-197.5 1602.5,-197.5 1602.5,-177.5 1444.5,-177.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1538.5\" y=\"-184\" font-family=\"Courier,monospace\" font-size=\"10.00\">adam_beta1</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1602.5,-177.5 1602.5,-197.5 1646.5,-197.5 1646.5,-177.5 1602.5,-177.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1606.5\" y=\"-184\" font-family=\"Courier,monospace\" font-size=\"10.00\">0.9</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"1444.5,-157.5 1444.5,-177.5 1602.5,-177.5 1602.5,-157.5 1444.5,-157.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1444.5,-157.5 1444.5,-177.5 1602.5,-177.5 1602.5,-157.5 1444.5,-157.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1538.5\" y=\"-164\" font-family=\"Courier,monospace\" font-size=\"10.00\">adam_beta2</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1602.5,-157.5 1602.5,-177.5 1646.5,-177.5 1646.5,-157.5 1602.5,-157.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1606.5\" y=\"-164\" font-family=\"Courier,monospace\" font-size=\"10.00\">0.95</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"1444.5,-137.5 1444.5,-157.5 1602.5,-157.5 1602.5,-137.5 1444.5,-137.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1444.5,-137.5 1444.5,-157.5 1602.5,-157.5 1602.5,-137.5 1444.5,-137.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1550.5\" y=\"-144\" font-family=\"Courier,monospace\" font-size=\"10.00\">adam_eps</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1602.5,-137.5 1602.5,-157.5 1646.5,-157.5 1646.5,-137.5 1602.5,-137.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1606.5\" y=\"-144\" font-family=\"Courier,monospace\" font-size=\"10.00\">1e&#45;05</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"1444.5,-117.5 1444.5,-137.5 1602.5,-137.5 1602.5,-117.5 1444.5,-117.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1444.5,-117.5 1444.5,-137.5 1602.5,-137.5 1602.5,-117.5 1444.5,-117.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1448.5\" y=\"-124\" font-family=\"Courier,monospace\" font-size=\"10.00\">use_distributed_optimizer</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1602.5,-117.5 1602.5,-137.5 1646.5,-137.5 1646.5,-117.5 1602.5,-117.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1606.5\" y=\"-124\" font-family=\"Courier,monospace\" font-size=\"10.00\">True</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"1444.5,-97.5 1444.5,-117.5 1602.5,-117.5 1602.5,-97.5 1444.5,-97.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1444.5,-97.5 1444.5,-117.5 1602.5,-117.5 1602.5,-97.5 1444.5,-97.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1484.5\" y=\"-104\" font-family=\"Courier,monospace\" font-size=\"10.00\">overlap_grad_reduce</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1602.5,-97.5 1602.5,-117.5 1646.5,-117.5 1646.5,-97.5 1602.5,-97.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1606.5\" y=\"-104\" font-family=\"Courier,monospace\" font-size=\"10.00\">True</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"1444.5,-77.5 1444.5,-97.5 1602.5,-97.5 1602.5,-77.5 1444.5,-77.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1444.5,-77.5 1444.5,-97.5 1602.5,-97.5 1602.5,-77.5 1444.5,-77.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1478.5\" y=\"-84\" font-family=\"Courier,monospace\" font-size=\"10.00\">overlap_param_gather</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1602.5,-77.5 1602.5,-97.5 1646.5,-97.5 1646.5,-77.5 1602.5,-77.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1606.5\" y=\"-84\" font-family=\"Courier,monospace\" font-size=\"10.00\">True</text>\n",
       "</g>\n",
       "<!-- 13&#45;&#45;14 -->\n",
       "<g id=\"edge14\" class=\"edge\">\n",
       "<title>13:c&#45;&#45;14:c</title>\n",
       "<path fill=\"none\" stroke=\"#cb99a2\" stroke-width=\"3\" stroke-opacity=\"0.501961\" d=\"M1545.99,-494.61C1545.96,-472.09 1545.8,-375.99 1545.68,-297.39\"/>\n",
       "</g>\n",
       "<!-- 15 -->\n",
       "<g id=\"node16\" class=\"node\">\n",
       "<title>15</title>\n",
       "<polygon fill=\"#90ee90\" stroke=\"none\" points=\"1664.5,-207.5 1664.5,-227.5 1896.5,-227.5 1896.5,-207.5 1664.5,-207.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1664.5,-207.5 1664.5,-227.5 1896.5,-227.5 1896.5,-207.5 1664.5,-207.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1689.75\" y=\"-215\" font-family=\"Courier,monospace\" font-size=\"8.00\">Config:</text>\n",
       "<text text-anchor=\"start\" x=\"1721.25\" y=\"-215\" font-family=\"Courier,monospace\" font-size=\"10.00\"> CosineAnnealingScheduler</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"1664.5,-187.5 1664.5,-207.5 1756.5,-207.5 1756.5,-187.5 1664.5,-187.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1664.5,-187.5 1664.5,-207.5 1756.5,-207.5 1756.5,-187.5 1664.5,-187.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1680.5\" y=\"-194\" font-family=\"Courier,monospace\" font-size=\"10.00\">warmup_steps</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1756.5,-187.5 1756.5,-207.5 1896.5,-207.5 1896.5,-187.5 1756.5,-187.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1760.5\" y=\"-194\" font-family=\"Courier,monospace\" font-size=\"10.00\">2000</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"1664.5,-167.5 1664.5,-187.5 1756.5,-187.5 1756.5,-167.5 1664.5,-167.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1664.5,-167.5 1664.5,-187.5 1756.5,-187.5 1756.5,-167.5 1664.5,-167.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1668.5\" y=\"-174\" font-family=\"Courier,monospace\" font-size=\"10.00\">constant_steps</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1756.5,-167.5 1756.5,-187.5 1896.5,-187.5 1896.5,-167.5 1756.5,-167.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1760.5\" y=\"-174\" font-family=\"Courier,monospace\" font-size=\"10.00\">0</text>\n",
       "<polygon fill=\"#eeeeee\" stroke=\"none\" points=\"1664.5,-147.5 1664.5,-167.5 1756.5,-167.5 1756.5,-147.5 1664.5,-147.5\"/>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1664.5,-147.5 1664.5,-167.5 1756.5,-167.5 1756.5,-147.5 1664.5,-147.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1716.5\" y=\"-154\" font-family=\"Courier,monospace\" font-size=\"10.00\">min_lr</text>\n",
       "<polygon fill=\"none\" stroke=\"black\" points=\"1756.5,-147.5 1756.5,-167.5 1896.5,-167.5 1896.5,-147.5 1756.5,-147.5\"/>\n",
       "<text text-anchor=\"start\" x=\"1760.5\" y=\"-154\" font-family=\"Courier,monospace\" font-size=\"10.00\">2.9999999999999997e&#45;05</text>\n",
       "</g>\n",
       "<!-- 13&#45;&#45;15 -->\n",
       "<g id=\"edge15\" class=\"edge\">\n",
       "<title>13:c&#45;&#45;15:c</title>\n",
       "<path fill=\"none\" stroke=\"#73be73\" stroke-width=\"3\" stroke-opacity=\"0.501961\" d=\"M1549.68,-474.93C1572.12,-447.09 1691.32,-299.16 1749.08,-227.49\"/>\n",
       "</g>\n",
       "</g>\n",
       "</svg>\n"
      ],
      "text/plain": [
       "<Partial[pretrain(\n",
       "  model=<Config[LlamaModel(config=<Config[Llama3Config8B()]>)]>,\n",
       "  data=<Config[MockDataModule(seq_length=8192, micro_batch_size=1, global_batch_size=512)]>,\n",
       "  trainer=<Config[Trainer(\n",
       "    accelerator='gpu',\n",
       "    accumulate_grad_batches=1,\n",
       "    callbacks=[<Config[TimingCallback()]>],\n",
       "    devices=8,\n",
       "    gradient_clip_val=1.0,\n",
       "    limit_test_batches=50,\n",
       "    limit_val_batches=32,\n",
       "    log_every_n_steps=10,\n",
       "    max_steps=1168251,\n",
       "    num_nodes=1,\n",
       "    plugins=<Config[MegatronMixedPrecision(\n",
       "      precision='bf16-mixed',\n",
       "      params_dtype=torch.bfloat16,\n",
       "      pipeline_dtype=torch.bfloat16,\n",
       "      autocast_enabled=False,\n",
       "      grad_reduce_in_fp32=True)]>,\n",
       "    strategy=<Config[MegatronStrategy(\n",
       "      tensor_model_parallel_size=1,\n",
       "      pipeline_model_parallel_size=1,\n",
       "      virtual_pipeline_model_parallel_size=None,\n",
       "      context_parallel_size=2,\n",
       "      sequence_parallel=False,\n",
       "      ckpt_include_optimizer=True,\n",
       "      pipeline_dtype=None,\n",
       "      ckpt_async_save=True,\n",
       "      ckpt_parallel_load=True,\n",
       "      gradient_as_bucket_view=True)]>,\n",
       "    use_distributed_sampler=False,\n",
       "    val_check_interval=2000)]>,\n",
       "  log=<Config[NeMoLogger(\n",
       "    name='default',\n",
       "    dir=None,\n",
       "    ckpt=<Config[ModelCheckpoint(\n",
       "      save_last=True,\n",
       "      save_top_k=10,\n",
       "      every_n_train_steps=200,\n",
       "      save_best_model=False,\n",
       "      filename='{model_name}--{val_loss:.2f}-{step}-{consumed_samples}')]>,\n",
       "    tensorboard=<Config[TensorBoardLogger(save_dir='tb_logs', name='default')]>,\n",
       "    wandb=None)]>,\n",
       "  resume=<Config[AutoResume(resume_if_exists=True, resume_ignore_no_checkpoint=True)]>,\n",
       "  optim=<Config[MegatronOptimizerModule(\n",
       "    config=<Config[OptimizerConfig(\n",
       "      optimizer='adam',\n",
       "      lr=0.0003,\n",
       "      weight_decay=0.1,\n",
       "      bf16=True,\n",
       "      adam_beta1=0.9,\n",
       "      adam_beta2=0.95,\n",
       "      adam_eps=1e-05,\n",
       "      use_distributed_optimizer=True,\n",
       "      overlap_grad_reduce=True,\n",
       "      overlap_param_gather=True)]>,\n",
       "    lr_scheduler=<Config[CosineAnnealingScheduler(\n",
       "      warmup_steps=2000,\n",
       "      constant_steps=0,\n",
       "      min_lr=2.9999999999999997e-05)]>)]>)]>"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pretrain = llama3_8b.pretrain_recipe(num_nodes=1, num_gpus_per_node=8)\n",
    "\n",
    "pretrain"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
